################################################################################################
# This script takes the raw data from Qualtrics and recodes some of the QV/SV variables.
# It also filters out subjects who did not complete the task. Lastly, it breaks the results
# based on what condition there were assigned. 
# 
# Exports the "raw" dataframes for QV and SV (no timing information is filtered). 
# 
# Author: Luis S.
# Last Modified: 7.23.16
################################################################################################

library (XML)
library (dplyr)

# setwd("C:/Users/Luis/Dropbox/Research/Casella/Storable Votes (MTurk) - California Experiment/California Experiment Results/")
participantData_raw <- xmlToDataFrame("California Stage 2 - SV & QV Data/Raw Data/California_MTurk__SV__QV_Stage_2.xml", stringsAsFactors = F)

####################################
##### Qualtrics Pre-Processing #####
####################################
# Remove subjects that were kicked out and filter data
participantData <- filter(participantData_raw, Q_TerminateFlag == "Success") %>% 
  select(MID, Q_TotalDuration, matches("Ref\\d"), matches("QV"), matches("SV"),
                VoteTreatment, EducationPref, TeachersPref, BondsPref, ImmigrationPref)

# Filter data by Treatment (QV & SV) and export it as raw
QV_raw <- filter(participantData, VoteTreatment == "QV") %>% 
  select(MID, Q_TotalDuration, matches("Ref\\d"), matches("QV"), 
                EducationPref, TeachersPref, BondsPref, ImmigrationPref)
  
SV_raw <- filter(participantData, VoteTreatment == "SV") %>% 
  select(MID, Q_TotalDuration, matches("Ref\\d"), matches("SV"), 
                EducationPref, TeachersPref, BondsPref, ImmigrationPref)

write.csv(QV_raw, "California Stage 2 - SV & QV Data/Raw Data/California_MTurk_QV_Stage_2_raw.csv", row.names = F)
write.csv(SV_raw, "California Stage 2 - SV & QV Data/Raw Data/California_MTurk_SV_Stage_2_raw.csv", row.names = F)

#####################################
### Data Cleaning & Preprocessing ###
#####################################

####### SV ########

# Filters out all data except preferences and votes.
SV_clean <- select(SV_raw, MID, matches("Educ"), matches("Teacher"), matches("Bonds"), matches("Immigration"),
                   Q_TotalDuration, matches("SV\\d_Vote_11_Group")) 

# Renames Voting categories from numbers to names
SV_clean <-  rename(SV_clean, SV1_Vote = SV1_Vote_11_Group, SV2_Vote = SV2_Vote_11_Group)

# Recodes the votes
SV_clean$SV1_Vote <- gsub('0$', 'BilingualEduc', SV_clean$SV1_Vote)
SV_clean$SV1_Vote <- gsub('1$', 'TeacherTenure', SV_clean$SV1_Vote)
SV_clean$SV1_Vote <- gsub('2$', 'PublicBonds', SV_clean$SV1_Vote)
SV_clean$SV1_Vote <- gsub('3$', 'Immigration', SV_clean$SV1_Vote)

SV_clean$SV2_Vote <- gsub('0$', 'BilingualEduc', SV_clean$SV2_Vote)
SV_clean$SV2_Vote <- gsub('1$', 'TeacherTenure', SV_clean$SV2_Vote)
SV_clean$SV2_Vote <- gsub('2$', 'PublicBonds', SV_clean$SV2_Vote)
SV_clean$SV2_Vote <- gsub('3$', 'Immigration', SV_clean$SV2_Vote)

SV_clean$Ref1_BilingualEduc <- gsub('-1', 'Against', SV_clean$Ref1_BilingualEduc)
SV_clean$Ref1_BilingualEduc <- gsub('0', 'Abstain', SV_clean$Ref1_BilingualEduc)
SV_clean$Ref1_BilingualEduc <- gsub('1', 'InFavor', SV_clean$Ref1_BilingualEduc)

SV_clean$Ref2_TeacherTenure <- gsub('-1', 'Against', SV_clean$Ref2_TeacherTenure)
SV_clean$Ref2_TeacherTenure <- gsub('0', 'Abstain', SV_clean$Ref2_TeacherTenure)
SV_clean$Ref2_TeacherTenure <- gsub('1', 'InFavor', SV_clean$Ref2_TeacherTenure)

SV_clean$Ref3_PublicBonds <- gsub('-1', 'Against', SV_clean$Ref3_PublicBonds)
SV_clean$Ref3_PublicBonds <- gsub('0', 'Abstain', SV_clean$Ref3_PublicBonds)
SV_clean$Ref3_PublicBonds <- gsub('1', 'InFavor', SV_clean$Ref3_PublicBonds)

SV_clean$Ref4_Immigration <- gsub('-1', 'Against', SV_clean$Ref4_Immigration)
SV_clean$Ref4_Immigration <- gsub('0', 'Abstain', SV_clean$Ref4_Immigration)
SV_clean$Ref4_Immigration <- gsub('1', 'InFavor', SV_clean$Ref4_Immigration)

write.csv(SV_clean, "California Stage 2 - SV & QV Data/Clean Data/California_MTurk_SV_Stage_2_clean.csv", row.names = F)



####### QV ########

# Filters out all data except preferences and votes.
QV_clean <- select(QV_raw, MID, matches("Educ"), matches("Teacher"), matches("Bonds"), matches("Immigration"),
                   Q_TotalDuration, matches("QV\\d_Class"), matches("QV\\d_Vote")) 

# Recodes the chosen vote classes & Voting Sides

QV_clean$QV1_Class <- gsub('7$', '(1) Red', QV_clean$QV1_Class)
QV_clean$QV1_Class <- gsub('6$', '(2) Yellow', QV_clean$QV1_Class)
QV_clean$QV1_Class <- gsub('5$', '(3) Green', QV_clean$QV1_Class)
QV_clean$QV1_Class <- gsub('4$', '(4) Blue', QV_clean$QV1_Class)

QV_clean$QV2_Class <- gsub('4$', '(1) Red', QV_clean$QV2_Class)
QV_clean$QV2_Class <- gsub('3$', '(2) Yellow', QV_clean$QV2_Class)
QV_clean$QV2_Class <- gsub('2$', '(3) Green', QV_clean$QV2_Class)
QV_clean$QV2_Class <- gsub('1$', '(4) Blue', QV_clean$QV2_Class)

QV_clean$Ref1_BilingualEduc <- gsub('-1', 'Against', QV_clean$Ref1_BilingualEduc)
QV_clean$Ref1_BilingualEduc <- gsub('0', 'Abstain', QV_clean$Ref1_BilingualEduc)
QV_clean$Ref1_BilingualEduc <- gsub('1', 'InFavor', QV_clean$Ref1_BilingualEduc)

QV_clean$Ref2_TeacherTenure <- gsub('-1', 'Against', QV_clean$Ref2_TeacherTenure)
QV_clean$Ref2_TeacherTenure <- gsub('0', 'Abstain', QV_clean$Ref2_TeacherTenure)
QV_clean$Ref2_TeacherTenure <- gsub('1', 'InFavor', QV_clean$Ref2_TeacherTenure)

QV_clean$Ref3_PublicBonds <- gsub('-1', 'Against', QV_clean$Ref3_PublicBonds)
QV_clean$Ref3_PublicBonds <- gsub('0', 'Abstain', QV_clean$Ref3_PublicBonds)
QV_clean$Ref3_PublicBonds <- gsub('1', 'InFavor', QV_clean$Ref3_PublicBonds)

QV_clean$Ref4_Immigration <- gsub('-1', 'Against', QV_clean$Ref4_Immigration)
QV_clean$Ref4_Immigration <- gsub('0', 'Abstain', QV_clean$Ref4_Immigration)
QV_clean$Ref4_Immigration <- gsub('1', 'InFavor', QV_clean$Ref4_Immigration)


# Renames Voting categories from numbers to names
QV_clean <-  rename(QV_clean, QV1_Vote_BilingualEduc = QV1_Vote_1, QV2_Vote_BilingualEduc = QV2_Vote_1, 
                    QV1_Vote_TeacherTenure = QV1_Vote_2, QV2_Vote_TeacherTenure = QV2_Vote_2,
                    QV1_Vote_PublicBonds = QV1_Vote_3, QV2_Vote_PublicBonds = QV2_Vote_3,
                    QV1_Vote_Immigration = QV1_Vote_4, QV2_Vote_Immigration = QV2_Vote_4)

write.csv(QV_clean, "California Stage 2 - SV & QV Data/Clean Data/California_MTurk_QV_Stage_2_clean.csv", row.names = F)


